{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_csv = pd.read_excel(r'./使用数据/小班属性表.xls')\n",
    "data = pd.DataFrame(file_csv)\n",
    "df = data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据预览"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(338, 87)"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape#查看行列数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>G360726050609901</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>484</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>9484092A-A0D4-4F8A-B6E4-4EF65F536117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>G360726050612301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>123</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>613</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>DCAED839-5ED1-450D-BDD7-79CE74AB37AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>G360726050602802</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>420</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-22</td>\n",
       "      <td>acc8ae69-6c3a-41f3-9f1c-25db74ec0488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>G360726050609801</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>98</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>441</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>48B0AABF-2026-4E2C-A686-BBDA04204B45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>G360726050609701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>97</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>432</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>4F321F37-E249-49D5-A3BA-AB47E7CFC082</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "0  G360726050609901   1.0    5    6   99    1     1    51    484  1.0  ...   \n",
       "1  G360726050612301   1.0    5    6  123    1     1    40    613  1.0  ...   \n",
       "2  G360726050602802   NaN    5    6   28    2     1    51    420  3.0  ...   \n",
       "3  G360726050609801   1.0    5    6   98    1     1    51    441  3.0  ...   \n",
       "4  G360726050609701   1.0    5    6   97    1     1    51    432  3.0  ...   \n",
       "\n",
       "   主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "0    3.0     301.0    1.0     15    1.1   0      NaN      1  2019-09-27   \n",
       "1    3.0     301.0    1.0     16    1.5   0      NaN      1  2019-09-27   \n",
       "2    3.0     301.0    1.0     45    1.6   0      NaN      1  2019-09-22   \n",
       "3    3.0     301.0    1.0     21    1.5   0      NaN      1  2019-09-27   \n",
       "4    3.0     301.0    1.0     25    1.5   0      NaN      1  2019-09-18   \n",
       "\n",
       "                                   连接字段  \n",
       "0  9484092A-A0D4-4F8A-B6E4-4EF65F536117  \n",
       "1  DCAED839-5ED1-450D-BDD7-79CE74AB37AB  \n",
       "2  acc8ae69-6c3a-41f3-9f1c-25db74ec0488  \n",
       "3  48B0AABF-2026-4E2C-A686-BBDA04204B45  \n",
       "4  4F321F37-E249-49D5-A3BA-AB47E7CFC082  \n",
       "\n",
       "[5 rows x 87 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(5)#前五行数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>G360726060301204</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>270</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>0C495930-8DE1-4C80-AD55-8B254FD03B05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>G360726060301301</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>350</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>G360726060300403</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>310</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>946F45ED-C89A-42E4-9F67-A48C2D2E5583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>G360726060300902</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>300</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-12</td>\n",
       "      <td>8C313EF1-7D64-4892-B33E-F8459847F299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>G360726060300803</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>290</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>2392C5AE-51D7-4676-98E8-947DF46D7D09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "333  G360726060301204   NaN    6    3   12    4     1    51    270  8.0  ...   \n",
       "334  G360726060301301   NaN    6    3   13    1     1    51    350  8.0  ...   \n",
       "335  G360726060300403   NaN    6    3    4    3     1    51    310  4.0  ...   \n",
       "336  G360726060300902   4.0    6    3    9    2     1    51    300  6.0  ...   \n",
       "337  G360726060300803   NaN    6    3    8    3     1    51    290  4.0  ...   \n",
       "\n",
       "     主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "333    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "334    3.0     201.0    2.0     35    1.8   0      NaN      1  2019-09-16   \n",
       "335    3.0     201.0    NaN     35    1.0   0      NaN      1  2019-09-18   \n",
       "336    3.0     301.0    2.0     25    1.2   0      NaN      1  2019-09-12   \n",
       "337    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "\n",
       "                                     连接字段  \n",
       "333  0C495930-8DE1-4C80-AD55-8B254FD03B05  \n",
       "334  E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC  \n",
       "335  946F45ED-C89A-42E4-9F67-A48C2D2E5583  \n",
       "336  8C313EF1-7D64-4892-B33E-F8459847F299  \n",
       "337  2392C5AE-51D7-4676-98E8-947DF46D7D09  \n",
       "\n",
       "[5 rows x 87 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(5)#后五行数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 338 entries, 0 to 337\n",
      "Data columns (total 87 columns):\n",
      " #   Column      Non-Null Count  Dtype  \n",
      "---  ------      --------------  -----  \n",
      " 0   id          338 non-null    object \n",
      " 1   调查方式        165 non-null    float64\n",
      " 2   乡镇场         338 non-null    int64  \n",
      " 3   村分场         338 non-null    int64  \n",
      " 4   小班号         338 non-null    int64  \n",
      " 5   细班号         338 non-null    int64  \n",
      " 6   流域名称        338 non-null    int64  \n",
      " 7   地貌类型        338 non-null    int64  \n",
      " 8   平均海拔高       338 non-null    int64  \n",
      " 9   坡向          336 non-null    float64\n",
      " 10  坡位          336 non-null    float64\n",
      " 11  坡度          338 non-null    int64  \n",
      " 12  成土母岩        338 non-null    int64  \n",
      " 13  土壤名称        338 non-null    int64  \n",
      " 14  土层厚度        338 non-null    int64  \n",
      " 15  腐殖质厚度       338 non-null    int64  \n",
      " 16  植被总盖度       338 non-null    int64  \n",
      " 17  水土流失类型      320 non-null    float64\n",
      " 18  水土流失强度      320 non-null    float64\n",
      " 19  土地所有权       338 non-null    int64  \n",
      " 20  林木所有权       327 non-null    float64\n",
      " 21  林木使用权       327 non-null    float64\n",
      " 22  面积亩         338 non-null    float64\n",
      " 23  地类          338 non-null    int64  \n",
      " 24  林种          322 non-null    float64\n",
      " 25  经营类型        322 non-null    float64\n",
      " 26  起源          319 non-null    float64\n",
      " 27  优势树种        319 non-null    float64\n",
      " 28  平均年龄        338 non-null    int64  \n",
      " 29  龄组          311 non-null    float64\n",
      " 30  平均胸径        338 non-null    float64\n",
      " 31  平均树高        338 non-null    float64\n",
      " 32  郁闭度         338 non-null    float64\n",
      " 33  活立木总蓄积      338 non-null    int64  \n",
      " 34  林分蓄积        338 non-null    int64  \n",
      " 35  散生蓄积        338 non-null    int64  \n",
      " 36  四旁树蓄积       338 non-null    int64  \n",
      " 37  四旁树株数       338 non-null    int64  \n",
      " 38  杉类占         338 non-null    int64  \n",
      " 39  松类占         338 non-null    int64  \n",
      " 40  硬阔类占        338 non-null    int64  \n",
      " 41  软阔类占        338 non-null    int64  \n",
      " 42  树种比         338 non-null    object \n",
      " 43  枯立木蓄积       338 non-null    int64  \n",
      " 44  毛竹株数        338 non-null    int64  \n",
      " 45  幼龄毛竹株数      338 non-null    int64  \n",
      " 46  壮龄毛竹株数      338 non-null    int64  \n",
      " 47  杂竹株数        338 non-null    int64  \n",
      " 48  造林类型        0 non-null      float64\n",
      " 49  公益林事权       199 non-null    float64\n",
      " 50  公益林区位类型     199 non-null    float64\n",
      " 51  公益林区域名称     199 non-null    float64\n",
      " 52  群落结构类型      323 non-null    float64\n",
      " 53  林层结构        255 non-null    float64\n",
      " 54  树种结构        256 non-null    float64\n",
      " 55  病虫害等级       42 non-null     float64\n",
      " 56  火灾等级        29 non-null     float64\n",
      " 57  其它灾害等级      76 non-null     float64\n",
      " 58  森林健康等级      312 non-null    float64\n",
      " 59  森林自然度       324 non-null    float64\n",
      " 60  人工林生长等级     32 non-null     float64\n",
      " 61  天然更新等级      87 non-null     float64\n",
      " 62  用材林可及度      10 non-null     float64\n",
      " 63  经营措施类型      325 non-null    float64\n",
      " 64  大等于5CM亩均株数  338 non-null    int64  \n",
      " 65  小于5CM亩均株数   338 non-null    int64  \n",
      " 66  亩平均蓄积       338 non-null    float64\n",
      " 67  散生木株数       338 non-null    int64  \n",
      " 68  森林类别        322 non-null    float64\n",
      " 69  交通区位等级      327 non-null    float64\n",
      " 70  林地质量等级      321 non-null    float64\n",
      " 71  林地保护等级      325 non-null    float64\n",
      " 72  土地退化类型      323 non-null    float64\n",
      " 73  国家级公益林保护等级  85 non-null     float64\n",
      " 74  林带长度        338 non-null    int64  \n",
      " 75  林带宽度        338 non-null    int64  \n",
      " 76  林地功能分区      321 non-null    object \n",
      " 77  主体功能区       321 non-null    float64\n",
      " 78  土地利用现状地类    326 non-null    float64\n",
      " 79  林木生长势       132 non-null    float64\n",
      " 80  灌木覆盖度       338 non-null    int64  \n",
      " 81  灌木平均高       338 non-null    float64\n",
      " 82  其它          338 non-null    int64  \n",
      " 83  重要生态功能区     0 non-null      float64\n",
      " 84  土地使用权       338 non-null    int64  \n",
      " 85  调查时间        332 non-null    object \n",
      " 86  连接字段        338 non-null    object \n",
      "dtypes: float64(44), int64(38), object(5)\n",
      "memory usage: 229.9+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()#数据信息，包括列数、有无空值、数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['id', '调查方式', '乡镇场', '村分场', '小班号', '细班号', '流域名称', '地貌类型', '平均海拔高', '坡向',\n",
       "       '坡位', '坡度', '成土母岩', '土壤名称', '土层厚度', '腐殖质厚度', '植被总盖度', '水土流失类型',\n",
       "       '水土流失强度', '土地所有权', '林木所有权', '林木使用权', '面积亩', '地类', '林种', '经营类型', '起源',\n",
       "       '优势树种', '平均年龄', '龄组', '平均胸径', '平均树高', '郁闭度', '活立木总蓄积', '林分蓄积', '散生蓄积',\n",
       "       '四旁树蓄积', '四旁树株数', '杉类占', '松类占', '硬阔类占', '软阔类占', '树种比', '枯立木蓄积', '毛竹株数',\n",
       "       '幼龄毛竹株数', '壮龄毛竹株数', '杂竹株数', '造林类型', '公益林事权', '公益林区位类型', '公益林区域名称',\n",
       "       '群落结构类型', '林层结构', '树种结构', '病虫害等级', '火灾等级', '其它灾害等级', '森林健康等级', '森林自然度',\n",
       "       '人工林生长等级', '天然更新等级', '用材林可及度', '经营措施类型', '大等于5CM亩均株数', '小于5CM亩均株数',\n",
       "       '亩平均蓄积', '散生木株数', '森林类别', '交通区位等级', '林地质量等级', '林地保护等级', '土地退化类型',\n",
       "       '国家级公益林保护等级', '林带长度', '林带宽度', '林地功能分区', '主体功能区', '土地利用现状地类', '林木生长势',\n",
       "       '灌木覆盖度', '灌木平均高', '其它', '重要生态功能区', '土地使用权', '调查时间', '连接字段'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id          object\n",
       "调查方式       float64\n",
       "乡镇场          int64\n",
       "村分场          int64\n",
       "小班号          int64\n",
       "            ...   \n",
       "其它           int64\n",
       "重要生态功能区    float64\n",
       "土地使用权        int64\n",
       "调查时间        object\n",
       "连接字段        object\n",
       "Length: 87, dtype: object"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('int64')"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['乡镇场'].dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>坡位</th>\n",
       "      <th>...</th>\n",
       "      <th>林带长度</th>\n",
       "      <th>林带宽度</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>165.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>321.000000</td>\n",
       "      <td>326.000000</td>\n",
       "      <td>132.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>338.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.181818</td>\n",
       "      <td>5.565089</td>\n",
       "      <td>5.964497</td>\n",
       "      <td>39.863905</td>\n",
       "      <td>2.952663</td>\n",
       "      <td>1.0</td>\n",
       "      <td>49.535503</td>\n",
       "      <td>416.284024</td>\n",
       "      <td>5.080357</td>\n",
       "      <td>6.657738</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.950156</td>\n",
       "      <td>261.306748</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>23.976331</td>\n",
       "      <td>1.089941</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.032544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.717998</td>\n",
       "      <td>0.496480</td>\n",
       "      <td>2.482923</td>\n",
       "      <td>41.217442</td>\n",
       "      <td>2.438102</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.742480</td>\n",
       "      <td>110.947748</td>\n",
       "      <td>2.415640</td>\n",
       "      <td>1.148185</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.322115</td>\n",
       "      <td>64.955685</td>\n",
       "      <td>0.515041</td>\n",
       "      <td>15.080379</td>\n",
       "      <td>0.587571</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.177704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>160.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>9.250000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>350.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>201.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>397.500000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>1.300000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>56.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>463.750000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>35.000000</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>146.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>990.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1005.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>2.700000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 82 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             调查方式         乡镇场         村分场         小班号         细班号   流域名称  \\\n",
       "count  165.000000  338.000000  338.000000  338.000000  338.000000  338.0   \n",
       "mean     1.181818    5.565089    5.964497   39.863905    2.952663    1.0   \n",
       "std      0.717998    0.496480    2.482923   41.217442    2.438102    0.0   \n",
       "min      1.000000    5.000000    2.000000    1.000000    1.000000    1.0   \n",
       "25%      1.000000    5.000000    4.000000    9.250000    1.000000    1.0   \n",
       "50%      1.000000    6.000000    6.000000   22.000000    2.000000    1.0   \n",
       "75%      1.000000    6.000000    7.000000   56.000000    4.000000    1.0   \n",
       "max      4.000000    6.000000   11.000000  146.000000   14.000000    1.0   \n",
       "\n",
       "             地貌类型       平均海拔高          坡向          坡位  ...   林带长度   林带宽度  \\\n",
       "count  338.000000  338.000000  336.000000  336.000000  ...  338.0  338.0   \n",
       "mean    49.535503  416.284024    5.080357    6.657738  ...    0.0    0.0   \n",
       "std      3.742480  110.947748    2.415640    1.148185  ...    0.0    0.0   \n",
       "min     40.000000  160.000000    1.000000    1.000000  ...    0.0    0.0   \n",
       "25%     51.000000  350.000000    3.000000    7.000000  ...    0.0    0.0   \n",
       "50%     51.000000  397.500000    5.000000    7.000000  ...    0.0    0.0   \n",
       "75%     51.000000  463.750000    8.000000    7.000000  ...    0.0    0.0   \n",
       "max     51.000000  990.000000    9.000000    7.000000  ...    0.0    0.0   \n",
       "\n",
       "            主体功能区     土地利用现状地类       林木生长势       灌木覆盖度       灌木平均高     其它  \\\n",
       "count  321.000000   326.000000  132.000000  338.000000  338.000000  338.0   \n",
       "mean     2.950156   261.306748    1.250000   23.976331    1.089941    0.0   \n",
       "std      0.322115    64.955685    0.515041   15.080379    0.587571    0.0   \n",
       "min      1.000000   103.000000    1.000000    0.000000    0.000000    0.0   \n",
       "25%      3.000000   201.000000    1.000000   15.000000    1.000000    0.0   \n",
       "50%      3.000000   301.000000    1.000000   25.000000    1.300000    0.0   \n",
       "75%      3.000000   301.000000    1.000000   35.000000    1.500000    0.0   \n",
       "max      4.000000  1005.000000    4.000000   65.000000    2.700000    0.0   \n",
       "\n",
       "       重要生态功能区       土地使用权  \n",
       "count      0.0  338.000000  \n",
       "mean       NaN    1.032544  \n",
       "std        NaN    0.177704  \n",
       "min        NaN    1.000000  \n",
       "25%        NaN    1.000000  \n",
       "50%        NaN    1.000000  \n",
       "75%        NaN    1.000000  \n",
       "max        NaN    2.000000  \n",
       "\n",
       "[8 rows x 82 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()#数据描述、总数、平均数、标准差、四分之一位数、中位数、四分之三位数、最小值、最大值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>坡位</th>\n",
       "      <th>...</th>\n",
       "      <th>林带长度</th>\n",
       "      <th>林带宽度</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>165.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>321.000000</td>\n",
       "      <td>326.000000</td>\n",
       "      <td>132.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>338.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.181818</td>\n",
       "      <td>5.565089</td>\n",
       "      <td>5.964497</td>\n",
       "      <td>39.863905</td>\n",
       "      <td>2.952663</td>\n",
       "      <td>1.0</td>\n",
       "      <td>49.535503</td>\n",
       "      <td>416.284024</td>\n",
       "      <td>5.080357</td>\n",
       "      <td>6.657738</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.950156</td>\n",
       "      <td>261.306748</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>23.976331</td>\n",
       "      <td>1.089941</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.032544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.717998</td>\n",
       "      <td>0.496480</td>\n",
       "      <td>2.482923</td>\n",
       "      <td>41.217442</td>\n",
       "      <td>2.438102</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.742480</td>\n",
       "      <td>110.947748</td>\n",
       "      <td>2.415640</td>\n",
       "      <td>1.148185</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.322115</td>\n",
       "      <td>64.955685</td>\n",
       "      <td>0.515041</td>\n",
       "      <td>15.080379</td>\n",
       "      <td>0.587571</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.177704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>160.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>9.250000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>350.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>201.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>397.500000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>1.300000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>56.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>463.750000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>35.000000</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>146.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>990.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1005.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>2.700000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 82 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             调查方式         乡镇场         村分场         小班号         细班号   流域名称  \\\n",
       "count  165.000000  338.000000  338.000000  338.000000  338.000000  338.0   \n",
       "mean     1.181818    5.565089    5.964497   39.863905    2.952663    1.0   \n",
       "std      0.717998    0.496480    2.482923   41.217442    2.438102    0.0   \n",
       "min      1.000000    5.000000    2.000000    1.000000    1.000000    1.0   \n",
       "25%      1.000000    5.000000    4.000000    9.250000    1.000000    1.0   \n",
       "50%      1.000000    6.000000    6.000000   22.000000    2.000000    1.0   \n",
       "75%      1.000000    6.000000    7.000000   56.000000    4.000000    1.0   \n",
       "max      4.000000    6.000000   11.000000  146.000000   14.000000    1.0   \n",
       "\n",
       "             地貌类型       平均海拔高          坡向          坡位  ...   林带长度   林带宽度  \\\n",
       "count  338.000000  338.000000  336.000000  336.000000  ...  338.0  338.0   \n",
       "mean    49.535503  416.284024    5.080357    6.657738  ...    0.0    0.0   \n",
       "std      3.742480  110.947748    2.415640    1.148185  ...    0.0    0.0   \n",
       "min     40.000000  160.000000    1.000000    1.000000  ...    0.0    0.0   \n",
       "25%     51.000000  350.000000    3.000000    7.000000  ...    0.0    0.0   \n",
       "50%     51.000000  397.500000    5.000000    7.000000  ...    0.0    0.0   \n",
       "75%     51.000000  463.750000    8.000000    7.000000  ...    0.0    0.0   \n",
       "max     51.000000  990.000000    9.000000    7.000000  ...    0.0    0.0   \n",
       "\n",
       "            主体功能区     土地利用现状地类       林木生长势       灌木覆盖度       灌木平均高     其它  \\\n",
       "count  321.000000   326.000000  132.000000  338.000000  338.000000  338.0   \n",
       "mean     2.950156   261.306748    1.250000   23.976331    1.089941    0.0   \n",
       "std      0.322115    64.955685    0.515041   15.080379    0.587571    0.0   \n",
       "min      1.000000   103.000000    1.000000    0.000000    0.000000    0.0   \n",
       "25%      3.000000   201.000000    1.000000   15.000000    1.000000    0.0   \n",
       "50%      3.000000   301.000000    1.000000   25.000000    1.300000    0.0   \n",
       "75%      3.000000   301.000000    1.000000   35.000000    1.500000    0.0   \n",
       "max      4.000000  1005.000000    4.000000   65.000000    2.700000    0.0   \n",
       "\n",
       "       重要生态功能区       土地使用权  \n",
       "count      0.0  338.000000  \n",
       "mean       NaN    1.032544  \n",
       "std        NaN    0.177704  \n",
       "min        NaN    1.000000  \n",
       "25%        NaN    1.000000  \n",
       "50%        NaN    1.000000  \n",
       "75%        NaN    1.000000  \n",
       "max        NaN    2.000000  \n",
       "\n",
       "[8 rows x 82 columns]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()#数据描述、总数、平均数、标准差、四分之一位数、中位数、四分之三位数、最小值、最大值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    338.000000\n",
       "mean      94.849704\n",
       "std       98.816748\n",
       "min        1.300000\n",
       "25%       17.400000\n",
       "50%       53.550000\n",
       "75%      146.000000\n",
       "max      482.100000\n",
       "Name: 面积亩, dtype: float64"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['面积亩'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['id', '调查方式', '乡镇场', '村分场', '小班号', '细班号', '流域名称', '地貌类型', '平均海拔高', '坡向',\n",
       "       '坡位', '坡度', '成土母岩', '土壤名称', '土层厚度', '腐殖质厚度', '植被总盖度', '水土流失类型',\n",
       "       '水土流失强度', '土地所有权', '林木所有权', '林木使用权', '面积亩', '地类', '林种', '经营类型', '起源',\n",
       "       '优势树种', '平均年龄', '龄组', '平均胸径', '平均树高', '郁闭度', '活立木总蓄积', '林分蓄积', '散生蓄积',\n",
       "       '四旁树蓄积', '四旁树株数', '杉类占', '松类占', '硬阔类占', '软阔类占', '树种比', '枯立木蓄积', '毛竹株数',\n",
       "       '幼龄毛竹株数', '壮龄毛竹株数', '杂竹株数', '造林类型', '公益林事权', '公益林区位类型', '公益林区域名称',\n",
       "       '群落结构类型', '林层结构', '树种结构', '病虫害等级', '火灾等级', '其它灾害等级', '森林健康等级', '森林自然度',\n",
       "       '人工林生长等级', '天然更新等级', '用材林可及度', '经营措施类型', '大等于5CM亩均株数', '小于5CM亩均株数',\n",
       "       '亩平均蓄积', '散生木株数', '森林类别', '交通区位等级', '林地质量等级', '林地保护等级', '土地退化类型',\n",
       "       '国家级公益林保护等级', '林带长度', '林带宽度', '林地功能分区', '主体功能区', '土地利用现状地类', '林木生长势',\n",
       "       '灌木覆盖度', '灌木平均高', '其它', '重要生态功能区', '土地使用权', '调查时间', '连接字段'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns#查看数据列名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(338, 87)"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape#查看行列数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 338 entries, 0 to 337\n",
      "Data columns (total 87 columns):\n",
      " #   Column      Non-Null Count  Dtype  \n",
      "---  ------      --------------  -----  \n",
      " 0   id          338 non-null    object \n",
      " 1   调查方式        165 non-null    float64\n",
      " 2   乡镇场         338 non-null    int64  \n",
      " 3   村分场         338 non-null    int64  \n",
      " 4   小班号         338 non-null    int64  \n",
      " 5   细班号         338 non-null    int64  \n",
      " 6   流域名称        338 non-null    int64  \n",
      " 7   地貌类型        338 non-null    int64  \n",
      " 8   平均海拔高       338 non-null    int64  \n",
      " 9   坡向          336 non-null    float64\n",
      " 10  坡位          336 non-null    float64\n",
      " 11  坡度          338 non-null    int64  \n",
      " 12  成土母岩        338 non-null    int64  \n",
      " 13  土壤名称        338 non-null    int64  \n",
      " 14  土层厚度        338 non-null    int64  \n",
      " 15  腐殖质厚度       338 non-null    int64  \n",
      " 16  植被总盖度       338 non-null    int64  \n",
      " 17  水土流失类型      320 non-null    float64\n",
      " 18  水土流失强度      320 non-null    float64\n",
      " 19  土地所有权       338 non-null    int64  \n",
      " 20  林木所有权       327 non-null    float64\n",
      " 21  林木使用权       327 non-null    float64\n",
      " 22  面积亩         338 non-null    float64\n",
      " 23  地类          338 non-null    int64  \n",
      " 24  林种          322 non-null    float64\n",
      " 25  经营类型        322 non-null    float64\n",
      " 26  起源          319 non-null    float64\n",
      " 27  优势树种        319 non-null    float64\n",
      " 28  平均年龄        338 non-null    int64  \n",
      " 29  龄组          311 non-null    float64\n",
      " 30  平均胸径        338 non-null    float64\n",
      " 31  平均树高        338 non-null    float64\n",
      " 32  郁闭度         338 non-null    float64\n",
      " 33  活立木总蓄积      338 non-null    int64  \n",
      " 34  林分蓄积        338 non-null    int64  \n",
      " 35  散生蓄积        338 non-null    int64  \n",
      " 36  四旁树蓄积       338 non-null    int64  \n",
      " 37  四旁树株数       338 non-null    int64  \n",
      " 38  杉类占         338 non-null    int64  \n",
      " 39  松类占         338 non-null    int64  \n",
      " 40  硬阔类占        338 non-null    int64  \n",
      " 41  软阔类占        338 non-null    int64  \n",
      " 42  树种比         338 non-null    object \n",
      " 43  枯立木蓄积       338 non-null    int64  \n",
      " 44  毛竹株数        338 non-null    int64  \n",
      " 45  幼龄毛竹株数      338 non-null    int64  \n",
      " 46  壮龄毛竹株数      338 non-null    int64  \n",
      " 47  杂竹株数        338 non-null    int64  \n",
      " 48  造林类型        0 non-null      float64\n",
      " 49  公益林事权       199 non-null    float64\n",
      " 50  公益林区位类型     199 non-null    float64\n",
      " 51  公益林区域名称     199 non-null    float64\n",
      " 52  群落结构类型      323 non-null    float64\n",
      " 53  林层结构        255 non-null    float64\n",
      " 54  树种结构        256 non-null    float64\n",
      " 55  病虫害等级       42 non-null     float64\n",
      " 56  火灾等级        29 non-null     float64\n",
      " 57  其它灾害等级      76 non-null     float64\n",
      " 58  森林健康等级      312 non-null    float64\n",
      " 59  森林自然度       324 non-null    float64\n",
      " 60  人工林生长等级     32 non-null     float64\n",
      " 61  天然更新等级      87 non-null     float64\n",
      " 62  用材林可及度      10 non-null     float64\n",
      " 63  经营措施类型      325 non-null    float64\n",
      " 64  大等于5CM亩均株数  338 non-null    int64  \n",
      " 65  小于5CM亩均株数   338 non-null    int64  \n",
      " 66  亩平均蓄积       338 non-null    float64\n",
      " 67  散生木株数       338 non-null    int64  \n",
      " 68  森林类别        322 non-null    float64\n",
      " 69  交通区位等级      327 non-null    float64\n",
      " 70  林地质量等级      321 non-null    float64\n",
      " 71  林地保护等级      325 non-null    float64\n",
      " 72  土地退化类型      323 non-null    float64\n",
      " 73  国家级公益林保护等级  85 non-null     float64\n",
      " 74  林带长度        338 non-null    int64  \n",
      " 75  林带宽度        338 non-null    int64  \n",
      " 76  林地功能分区      321 non-null    object \n",
      " 77  主体功能区       321 non-null    float64\n",
      " 78  土地利用现状地类    326 non-null    float64\n",
      " 79  林木生长势       132 non-null    float64\n",
      " 80  灌木覆盖度       338 non-null    int64  \n",
      " 81  灌木平均高       338 non-null    float64\n",
      " 82  其它          338 non-null    int64  \n",
      " 83  重要生态功能区     0 non-null      float64\n",
      " 84  土地使用权       338 non-null    int64  \n",
      " 85  调查时间        332 non-null    object \n",
      " 86  连接字段        338 non-null    object \n",
      "dtypes: float64(44), int64(38), object(5)\n",
      "memory usage: 229.9+ KB\n"
     ]
    }
   ],
   "source": [
    "df.info()#数据信息，包括列数、有无空值、数据类型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据清洗"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 缺失值检测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>338 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        id   调查方式    乡镇场    村分场    小班号    细班号   流域名称   地貌类型  平均海拔高     坡向  \\\n",
       "0    False  False  False  False  False  False  False  False  False  False   \n",
       "1    False  False  False  False  False  False  False  False  False  False   \n",
       "2    False   True  False  False  False  False  False  False  False  False   \n",
       "3    False  False  False  False  False  False  False  False  False  False   \n",
       "4    False  False  False  False  False  False  False  False  False  False   \n",
       "..     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   \n",
       "333  False   True  False  False  False  False  False  False  False  False   \n",
       "334  False   True  False  False  False  False  False  False  False  False   \n",
       "335  False   True  False  False  False  False  False  False  False  False   \n",
       "336  False  False  False  False  False  False  False  False  False  False   \n",
       "337  False   True  False  False  False  False  False  False  False  False   \n",
       "\n",
       "     ...  主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高     其它  重要生态功能区  土地使用权   调查时间  \\\n",
       "0    ...  False     False  False  False  False  False     True  False  False   \n",
       "1    ...  False     False  False  False  False  False     True  False  False   \n",
       "2    ...  False     False  False  False  False  False     True  False  False   \n",
       "3    ...  False     False  False  False  False  False     True  False  False   \n",
       "4    ...  False     False  False  False  False  False     True  False  False   \n",
       "..   ...    ...       ...    ...    ...    ...    ...      ...    ...    ...   \n",
       "333  ...  False     False   True  False  False  False     True  False  False   \n",
       "334  ...  False     False  False  False  False  False     True  False  False   \n",
       "335  ...  False     False   True  False  False  False     True  False  False   \n",
       "336  ...  False     False  False  False  False  False     True  False  False   \n",
       "337  ...  False     False   True  False  False  False     True  False  False   \n",
       "\n",
       "      连接字段  \n",
       "0    False  \n",
       "1    False  \n",
       "2    False  \n",
       "3    False  \n",
       "4    False  \n",
       "..     ...  \n",
       "333  False  \n",
       "334  False  \n",
       "335  False  \n",
       "336  False  \n",
       "337  False  \n",
       "\n",
       "[338 rows x 87 columns]"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull()#索引空值判断，有空值返回值为True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>338 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       id   调查方式   乡镇场   村分场   小班号   细班号  流域名称  地貌类型  平均海拔高    坡向  ...  主体功能区  \\\n",
       "0    True   True  True  True  True  True  True  True   True  True  ...   True   \n",
       "1    True   True  True  True  True  True  True  True   True  True  ...   True   \n",
       "2    True  False  True  True  True  True  True  True   True  True  ...   True   \n",
       "3    True   True  True  True  True  True  True  True   True  True  ...   True   \n",
       "4    True   True  True  True  True  True  True  True   True  True  ...   True   \n",
       "..    ...    ...   ...   ...   ...   ...   ...   ...    ...   ...  ...    ...   \n",
       "333  True  False  True  True  True  True  True  True   True  True  ...   True   \n",
       "334  True  False  True  True  True  True  True  True   True  True  ...   True   \n",
       "335  True  False  True  True  True  True  True  True   True  True  ...   True   \n",
       "336  True   True  True  True  True  True  True  True   True  True  ...   True   \n",
       "337  True  False  True  True  True  True  True  True   True  True  ...   True   \n",
       "\n",
       "     土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高    其它  重要生态功能区  土地使用权  调查时间  连接字段  \n",
       "0        True   True   True   True  True    False   True  True  True  \n",
       "1        True   True   True   True  True    False   True  True  True  \n",
       "2        True   True   True   True  True    False   True  True  True  \n",
       "3        True   True   True   True  True    False   True  True  True  \n",
       "4        True   True   True   True  True    False   True  True  True  \n",
       "..        ...    ...    ...    ...   ...      ...    ...   ...   ...  \n",
       "333      True  False   True   True  True    False   True  True  True  \n",
       "334      True   True   True   True  True    False   True  True  True  \n",
       "335      True  False   True   True  True    False   True  True  True  \n",
       "336      True   True   True   True  True    False   True  True  True  \n",
       "337      True  False   True   True  True    False   True  True  True  \n",
       "\n",
       "[338 rows x 87 columns]"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.notnull()#索引空值判断，有空值返回值为False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id           0\n",
       "调查方式       173\n",
       "乡镇场          0\n",
       "村分场          0\n",
       "小班号          0\n",
       "          ... \n",
       "其它           0\n",
       "重要生态功能区    338\n",
       "土地使用权        0\n",
       "调查时间         6\n",
       "连接字段         0\n",
       "Length: 87, dtype: int64"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(df.isnull())#空值数量计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "id           0.0%\n",
      "调查方式       51.18%\n",
      "乡镇场          0.0%\n",
      "村分场          0.0%\n",
      "小班号          0.0%\n",
      "            ...  \n",
      "其它           0.0%\n",
      "重要生态功能区    100.0%\n",
      "土地使用权        0.0%\n",
      "调查时间        1.78%\n",
      "连接字段         0.0%\n",
      "Length: 87, dtype: object\n"
     ]
    }
   ],
   "source": [
    "count_missing = data.apply(lambda x:'{}%'.format(round(100*sum(x.isnull())/len(x),2)))#计算缺失率\n",
    "print(count_missing)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['调查方式'].fillna(value=np.mean(df['调查方式']))#填充平均数\n",
    "df['调查方式'].fillna(df['调查方式'].mode()[0])#填充众数\n",
    "#使用每列缺失值前面的值进行填充(按相应列填充,按照相应index前后填充)\n",
    "df['调查方式'].fillna(method='ffill',inplace=True)\n",
    "df['调查方式'].fillna(method='pad',inplace=True)\n",
    "#使用每列缺失值后面的值进行填充(按相应列填充,按照相应index前后填充)\n",
    "df['调查方式'].fillna(method='backfill',inplace=True)\n",
    "df['调查方式'].fillna(method='bfill',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      1.0\n",
       "1      1.0\n",
       "2      1.0\n",
       "3      1.0\n",
       "4      1.0\n",
       "      ... \n",
       "333    4.0\n",
       "334    4.0\n",
       "335    4.0\n",
       "336    4.0\n",
       "337    4.0\n",
       "Name: 调查方式, Length: 338, dtype: float64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['调查方式'].fillna(value=np.mean(df['调查方式']))#填充平均数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      1.0\n",
       "1      1.0\n",
       "2      1.0\n",
       "3      1.0\n",
       "4      1.0\n",
       "      ... \n",
       "333    4.0\n",
       "334    4.0\n",
       "335    4.0\n",
       "336    4.0\n",
       "337    4.0\n",
       "Name: 调查方式, Length: 338, dtype: float64"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['调查方式'].fillna(df['调查方式'].mode()[0])#填充众数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "#使用每列缺失值前面的值进行填充(按相应列填充,按照相应index前后填充)\n",
    "df['调查方式'].fillna(method='ffill',inplace=True)\n",
    "df['调查方式'].fillna(method='pad',inplace=True)\n",
    "#使用每列缺失值后面的值进行填充(按相应列填充,按照相应index前后填充)\n",
    "df['调查方式'].fillna(method='backfill',inplace=True)\n",
    "df['调查方式'].fillna(method='bfill',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡度</th>\n",
       "      <th>...</th>\n",
       "      <th>小于5CM亩均株数</th>\n",
       "      <th>亩平均蓄积</th>\n",
       "      <th>散生木株数</th>\n",
       "      <th>林带长度</th>\n",
       "      <th>林带宽度</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>G360726050609901</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>484</td>\n",
       "      <td>36</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>9.6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>9484092A-A0D4-4F8A-B6E4-4EF65F536117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>G360726050612301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>123</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>613</td>\n",
       "      <td>38</td>\n",
       "      <td>...</td>\n",
       "      <td>25</td>\n",
       "      <td>9.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>DCAED839-5ED1-450D-BDD7-79CE74AB37AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>G360726050602802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>420</td>\n",
       "      <td>32</td>\n",
       "      <td>...</td>\n",
       "      <td>110</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>45</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>acc8ae69-6c3a-41f3-9f1c-25db74ec0488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>G360726050609801</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>98</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>441</td>\n",
       "      <td>35</td>\n",
       "      <td>...</td>\n",
       "      <td>21</td>\n",
       "      <td>9.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>48B0AABF-2026-4E2C-A686-BBDA04204B45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>G360726050609701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>97</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>432</td>\n",
       "      <td>37</td>\n",
       "      <td>...</td>\n",
       "      <td>50</td>\n",
       "      <td>10.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4F321F37-E249-49D5-A3BA-AB47E7CFC082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>G360726060301204</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>270</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0C495930-8DE1-4C80-AD55-8B254FD03B05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>G360726060301301</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>350</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>G360726060300403</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>310</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>177</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>946F45ED-C89A-42E4-9F67-A48C2D2E5583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>G360726060300902</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>300</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>13</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8C313EF1-7D64-4892-B33E-F8459847F299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>G360726060300803</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>290</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>152</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2392C5AE-51D7-4676-98E8-947DF46D7D09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>338 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高  坡度  ...  \\\n",
       "0    G360726050609901   1.0    5    6   99    1     1    51    484  36  ...   \n",
       "1    G360726050612301   1.0    5    6  123    1     1    40    613  38  ...   \n",
       "2    G360726050602802   1.0    5    6   28    2     1    51    420  32  ...   \n",
       "3    G360726050609801   1.0    5    6   98    1     1    51    441  35  ...   \n",
       "4    G360726050609701   1.0    5    6   97    1     1    51    432  37  ...   \n",
       "..                ...   ...  ...  ...  ...  ...   ...   ...    ...  ..  ...   \n",
       "333  G360726060301204   4.0    6    3   12    4     1    51    270  18  ...   \n",
       "334  G360726060301301   4.0    6    3   13    1     1    51    350  20  ...   \n",
       "335  G360726060300403   4.0    6    3    4    3     1    51    310  19  ...   \n",
       "336  G360726060300902   4.0    6    3    9    2     1    51    300  20  ...   \n",
       "337  G360726060300803   4.0    6    3    8    3     1    51    290  19  ...   \n",
       "\n",
       "     小于5CM亩均株数  亩平均蓄积  散生木株数  林带长度  林带宽度  灌木覆盖度  灌木平均高  其它  土地使用权  \\\n",
       "0            0    9.6      0     0     0     15    1.1   0      1   \n",
       "1           25    9.4      0     0     0     16    1.5   0      1   \n",
       "2          110    0.0      0     0     0     45    1.6   0      1   \n",
       "3           21    9.4      0     0     0     21    1.5   0      1   \n",
       "4           50   10.7      0     0     0     25    1.5   0      1   \n",
       "..         ...    ...    ...   ...   ...    ...    ...  ..    ...   \n",
       "333          0    0.0      0     0     0     35    1.5   0      1   \n",
       "334          0    0.0    489     0     0     35    1.8   0      1   \n",
       "335          0    0.0    177     0     0     35    1.0   0      1   \n",
       "336         13    9.0      0     0     0     25    1.2   0      1   \n",
       "337          0    0.0    152     0     0     35    1.5   0      1   \n",
       "\n",
       "                                     连接字段  \n",
       "0    9484092A-A0D4-4F8A-B6E4-4EF65F536117  \n",
       "1    DCAED839-5ED1-450D-BDD7-79CE74AB37AB  \n",
       "2    acc8ae69-6c3a-41f3-9f1c-25db74ec0488  \n",
       "3    48B0AABF-2026-4E2C-A686-BBDA04204B45  \n",
       "4    4F321F37-E249-49D5-A3BA-AB47E7CFC082  \n",
       "..                                    ...  \n",
       "333  0C495930-8DE1-4C80-AD55-8B254FD03B05  \n",
       "334  E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC  \n",
       "335  946F45ED-C89A-42E4-9F67-A48C2D2E5583  \n",
       "336  8C313EF1-7D64-4892-B33E-F8459847F299  \n",
       "337  2392C5AE-51D7-4676-98E8-947DF46D7D09  \n",
       "\n",
       "[338 rows x 48 columns]"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dropna(axis=1)#删除含缺失值的列"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 重复值检测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>G360726050611401</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>114</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>457</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>86335B3F-2AB1-4985-B720-58511B26AD80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>G360726050611401</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>114</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>457</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>86335B3F-2AB1-4985-B720-58511B26AD80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "18  G360726050611401   1.0    5    6  114    1     1    51    457  4.0  ...   \n",
       "19  G360726050611401   1.0    5    6  114    1     1    51    457  4.0  ...   \n",
       "\n",
       "    主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "18    3.0     301.0    1.0     21    1.3   0      NaN      1  2019-09-27   \n",
       "19    3.0     301.0    1.0     21    1.3   0      NaN      1  2019-09-27   \n",
       "\n",
       "                                    连接字段  \n",
       "18  86335B3F-2AB1-4985-B720-58511B26AD80  \n",
       "19  86335B3F-2AB1-4985-B720-58511B26AD80  \n",
       "\n",
       "[2 rows x 87 columns]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated()]#查看重复值数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      False\n",
       "1      False\n",
       "2      False\n",
       "3      False\n",
       "4      False\n",
       "       ...  \n",
       "333    False\n",
       "334    False\n",
       "335    False\n",
       "336    False\n",
       "337    False\n",
       "Length: 338, dtype: bool"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.duplicated()#索引重复行，返回值为True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.duplicated().sum()#重复行总数量"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 重复值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>G360726050609901</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>484</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>9484092A-A0D4-4F8A-B6E4-4EF65F536117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>G360726050612301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>123</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>613</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>DCAED839-5ED1-450D-BDD7-79CE74AB37AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>G360726050602802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>420</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-22</td>\n",
       "      <td>acc8ae69-6c3a-41f3-9f1c-25db74ec0488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>G360726050609801</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>98</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>441</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>48B0AABF-2026-4E2C-A686-BBDA04204B45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>G360726050609701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>97</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>432</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>4F321F37-E249-49D5-A3BA-AB47E7CFC082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>G360726060301204</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>270</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>0C495930-8DE1-4C80-AD55-8B254FD03B05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>G360726060301301</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>350</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>G360726060300403</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>310</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>946F45ED-C89A-42E4-9F67-A48C2D2E5583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>G360726060300902</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>300</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-12</td>\n",
       "      <td>8C313EF1-7D64-4892-B33E-F8459847F299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>G360726060300803</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>290</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>2392C5AE-51D7-4676-98E8-947DF46D7D09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>336 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "0    G360726050609901   1.0    5    6   99    1     1    51    484  1.0  ...   \n",
       "1    G360726050612301   1.0    5    6  123    1     1    40    613  1.0  ...   \n",
       "2    G360726050602802   1.0    5    6   28    2     1    51    420  3.0  ...   \n",
       "3    G360726050609801   1.0    5    6   98    1     1    51    441  3.0  ...   \n",
       "4    G360726050609701   1.0    5    6   97    1     1    51    432  3.0  ...   \n",
       "..                ...   ...  ...  ...  ...  ...   ...   ...    ...  ...  ...   \n",
       "333  G360726060301204   4.0    6    3   12    4     1    51    270  8.0  ...   \n",
       "334  G360726060301301   4.0    6    3   13    1     1    51    350  8.0  ...   \n",
       "335  G360726060300403   4.0    6    3    4    3     1    51    310  4.0  ...   \n",
       "336  G360726060300902   4.0    6    3    9    2     1    51    300  6.0  ...   \n",
       "337  G360726060300803   4.0    6    3    8    3     1    51    290  4.0  ...   \n",
       "\n",
       "     主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "0      3.0     301.0    1.0     15    1.1   0      NaN      1  2019-09-27   \n",
       "1      3.0     301.0    1.0     16    1.5   0      NaN      1  2019-09-27   \n",
       "2      3.0     301.0    1.0     45    1.6   0      NaN      1  2019-09-22   \n",
       "3      3.0     301.0    1.0     21    1.5   0      NaN      1  2019-09-27   \n",
       "4      3.0     301.0    1.0     25    1.5   0      NaN      1  2019-09-18   \n",
       "..     ...       ...    ...    ...    ...  ..      ...    ...         ...   \n",
       "333    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "334    3.0     201.0    2.0     35    1.8   0      NaN      1  2019-09-16   \n",
       "335    3.0     201.0    NaN     35    1.0   0      NaN      1  2019-09-18   \n",
       "336    3.0     301.0    2.0     25    1.2   0      NaN      1  2019-09-12   \n",
       "337    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "\n",
       "                                     连接字段  \n",
       "0    9484092A-A0D4-4F8A-B6E4-4EF65F536117  \n",
       "1    DCAED839-5ED1-450D-BDD7-79CE74AB37AB  \n",
       "2    acc8ae69-6c3a-41f3-9f1c-25db74ec0488  \n",
       "3    48B0AABF-2026-4E2C-A686-BBDA04204B45  \n",
       "4    4F321F37-E249-49D5-A3BA-AB47E7CFC082  \n",
       "..                                    ...  \n",
       "333  0C495930-8DE1-4C80-AD55-8B254FD03B05  \n",
       "334  E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC  \n",
       "335  946F45ED-C89A-42E4-9F67-A48C2D2E5583  \n",
       "336  8C313EF1-7D64-4892-B33E-F8459847F299  \n",
       "337  2392C5AE-51D7-4676-98E8-947DF46D7D09  \n",
       "\n",
       "[336 rows x 87 columns]"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates()#删除重复行"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 异常值检测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([<matplotlib.axis.XTick at 0x15464905e10>], [Text(1, 0, '郁闭度')])"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAFECAYAAAATLIh7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAexAAAHsQEGxWGGAAAKlklEQVR4nO3dX4idiVnH8d+zTTXRRQy9XJfOWm39g2mwBgsuaClMKOIs2V7sRr1aFQkspU1UqCB60dudSGGT6oWwbly6F/UiixcR/1TEP4hX0xvZmiXFXbqiVzaQENM+XkwGhjhn0wlzntmZ/XwghPPMvOc8N8mX9z0v51R3BwCW7ZH9XgCA9wbBAWCE4AAwQnAAGHFkr5/w6aef7pWVlb1+WgAOmIsXL/55d3966/GeB2dlZSXr6+t7/bQAHDAXL178xvbHLqkBMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEbs+Yd3wmFy7ty5vPXWW7s+bmNjIydOnHio13zsscdy+fLlhzoW3s0EB97Bw/7Hv7a2lqtXr+7xNnCwuaQGwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAw4shOw6paTfJUkje6+4Vt86eT/GSSDyV5rru/M7IlAAfejsFJcjbJc0kuVNXR7r59b34nyRNJHr0/NlV1OsnpM2fOLG1ZeBjPPPNMbt26NfqaGxsbWVtbG33NY8eO5dVXXx19TdiNRcG5091dVTeTHE/yzXvzX0rym0l+o6p+uLvf2Dqgu68luXb+/PnPLXVj2KVbt27l6tWr+73G0k0HDnZr0Xs416vqVJKTSVa3zf8tyZeSfCLJ28tdDYDDZNEZznqSZ5Nc6u6NrWF3XxzZCoBDZ8fgdPfdJFeGdwHgEHNbNAAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIzYMThVtVpVL1bVhR1+9jtV9eHlrwbAYbLoDOdskueTVFUd3RpW1WqS40l+/P4Dqup0Va3fuHFjGXsCcMAtCs6d7u4kN7MZmC2/0t2fT/KBqvro9gO6+1p3n19ZWVnOpgAcaIuCc72qTiU5mWR12/wvq+r3kjyZ5OtL3g2AQ+TIgvl6kmeTXOruja1hd//ZyFYAHDo7Bqe77ya5MrwLAIeY26IBGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMWPRZanBonHv89eSVZ/d7jaU79/jr+70CvCPB4dC7/B8fzqd++cv7vcbSXf7yWj6130vAO3BJDYARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFgxI7BqarVqnqxqi7s8LOfrqpXlr8aAIfJojOcs0meT1JVdXRrWFUfTfKzSV6//4CqOl1V6zdu3FjGngAccIuCc6e7O8nNJMe3zX81yfcm+XhV/dT2A7r7WnefX1lZWcqiABxsRxbMr1fVqSQnk9xK8lKSdPdvJ0lV/WB3f21kQwAOhUVnOOtJPpLkUne/dP8Pu/sPlrkUAIfPjmc43X03yZXhXQA4xNwWDcAIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACOO7DSsqtUkTyV5o7tf2Db/tSTfn+RHuvszMysCcBgsOsM5m+T5JFVVR7fNX+7uLyZ59P4Dqup0Va3fuHFj77cE4MBbFJw73d1JbiY5vjXs7jtV9dkkL95/QHdf6+7zKysry9gTgANuUXCuV9WpJCeTrG4Nq+ozSX4uySeq6vuWvx4Ah8WO7+EkWU/ybJJL3b2xNbx3Oe2LE4sBcLjsGJzuvpvkyvAuABxibosGYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAEYIDwAjBAWCE4AAwQnAAGCE4AIwQHABGCA4AIwQHgBGCA8AIwQFghOAAMEJwABghOACMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjBAcAEYIDgAjBAeAETsGp6pWq+rFqrrw3cwB4EGOLJifTfJckgtVdbS7bz9gnqo6neT0mTNnlrow7NZrr72WtbW1hzp2Y2Mjt2/ffvAv7qGjR4/mxIkTuz7u2LFjS9gG9s6i4Nzp7q6qm0mOJ/nmA+bp7mtJrp0/f/5zS90Ydqm793sFIIvfw7leVaeSnEyyumD+38tdDYDDZFFw1pN8JMml7n5pwfx/l70cAIfHjpfUuvtukivf7RwAHsRt0QCMEBwARggOACMEB4ARggPACMEBYITgADBCcAAYITgAjKi9/mDDqvpKkm/s6ZPCwfPB+HcAH+zuT2892PPgAElVrXf3+f3eA95NXFKD5bi23wvAu40zHABGOMOBXaqqR6rqif3eAw4awYFdqKpHkjyR5Ber6n1V9UJVfbWq1qrqx+79zv/72o+q+qF7f75QVR+qqseryndC857ikhrsQlV9LMnvJ/mrJP+Z5EeT/F2STnIjybeSvJnkWJJ/SfKnSf4oyVeS/FOSbyd5/72ne6m73x5cH/bVjl/ABiz0ZpITSX4gyWv3Zj+T5L+SfDLJzyf5ejaj8kqSu93dVXU0ydkkd5NUkvcl+fskgsN7huDA7qwm+edsfvPtx7J5xvKtJCtJ/iLJHyb5mySfT/LvSZ6squ9J8o/ZPCt6MpvB+dvu/tfh3WFfCQ7sQne/XFWfTPJbSf4hya0kf5LkepK/TnIxyR/fd9gH7v3e5WwG6tv3HgsO7yluGoDde7u7fyGb0UiSX8/m+zTPJHk5yf8k+c623z+VzbOij2fzfZ1rSb40tSy8WwgO7N4jVfV0Nt/PeX82L6+9ee/vryb5iWwG5gvZ/HibR7N5NeGzSb6WzfdtfreqnppeHPaTu9TgIVRVbd0M0N2393sfOAgEB4ARLqkBMEJwABghOACM+D/p4bqpl/aI9QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 500x400 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签\n",
    "plt.rcParams['axes.unicode_minus'] = False   #用来正常显示负号\n",
    "df1 = df['郁闭度']\n",
    "fig = plt.figure(figsize=(10,8),dpi=50) \n",
    "plt.boxplot(df1)         #画箱线图，直接使用DataFrame的方法\n",
    "plt.xticks([1],[\"郁闭度\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series([], Name: 郁闭度, dtype: float64)"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算下四分位数和上四分位\n",
    "Q1 = df['郁闭度'].quantile(q = 0.25)\n",
    "Q3 = df['郁闭度'].quantile(q = 0.75)\n",
    "\n",
    "# 基于1.5倍的四分位差计算上下须对应的值\n",
    "low_whisker = Q1 - 1.5*(Q3 - Q1)\n",
    "up_whisker = Q3 + 1.5*(Q3 - Q1)\n",
    "\n",
    "# 寻找异常点\n",
    "df['郁闭度'][(df['郁闭度'] > up_whisker) | (df['郁闭度'] < low_whisker)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.lines.Line2D at 0x1546495ddd8>"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAMEAAAD2CAYAAABvPeOJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAfH0lEQVR4nO2de3Bb133nP4ckSIIUSUCiREoE9bIdSrZk0WPajpLYliV5pWStia1t5e6ozrpJ66bJOLs7rXfjpslOO2nS2ll3O906qRu3bteebJQdWVm5jhxbWr8iO7FUEXpYj9gSKYASJREiQIoASRA4+wdwKRAESTzuvbjAPZ8ZjUmcS5xDk1/e+zvfc75HSClRKOxMRbEHoFAUGyUChe1RIlDYHiUChe1RIlDYnqpiD0CjublZLl++vNjDUJQphw8fHpBSLszUZhkRLF++nEOHDhV7GIoyRQjRO1ObehxS2B4lAoXtUSJQ2B4lAoXtUSJQ2B4lAoXtUSJQ2B7L+ASBQIAXXnhhymu33HILd9xxB9FolJdeemna13R2dtLZ2Uk4HGbXrl3T2ru6ulizZg2hUIiXX355Wvv69evp6OhgYGCAV155ZVr7Pffcw8qVK+nv72ffvn3T2jdt2kR7ezs+n4/9+/dPa9+6dSutra2cPXuWt99+e1r7Aw88QHNzM6dPn+a9996b1v7QQw/R1NTE8ePHM3ooO3bsoK6uju7ubrq7u6e179y5E4fDwQcffMCJEyemtT/66KMAHDx4kDNnzkxpczgc7Ny5E4C33nqLc+fOTWmvq6tjx44dALzxxhv4/f4p7Y2NjWzfvh2Affv20d/fP6V9wYIFbNu2DYC9e/cSCASmtLe2trJ161YAdu/ezdDQ0JR2j8fD5s2bAdi1axfhcHhK+4oVK7j33nsBMv7upKLuBArbI6yyqaarq0sqx1hhFEKIw1LKrkxt6k6gsD1KBArbo0SgsD1KBArbo0SgsD1KBArbo0SgsD15i0AI0SKEeGeWdocQ4hUhxEEhxBfz7UehMJq8RCCEcAP/BNTPctnjwCEp5aeAB4QQDfn0pVAYTb53ghjwMDA0yzUbAG1Bz0FgmlsnhHhMCHFICHHoypUreQ5FoSiMvEQgpRySUobmuKwe6Et+PAS0ZHif56SUXVLKroULMwYBKBSGY2RhfA1wJj+eZ3BfCkXeGPmLeRj4TPLjdUCPgX0pFHmjy34CIcRG4GYp5f9MefmfgFeFEHcDNwO/1KMvhUJvCroTSCk3JP97IE0ASCl7gfuBXwCbpZSxQvpSKIzC0J1lUsoLXJ8hUigsiSpWFbZHiUBhe5QIFLZHiUBhe5QIFLZHiUBhe5QIFLZHiUBhe5QIFLZHiUBhe5QIFLZHiUBheywTza4oPnuO9PH0a6e5EIywxOXkiS0dPHhbW7GHNSeFjluJQAEkfpGe3H2MSDSx4r0vGOHJ3ccALC0EPcatHocUADz92unJXySNSDTG06+dLtKIskOPcSsRKAC4EIzk9LpV0GPcSgQKAJa4nDm9bhUWNdZkfD2XcSsRKAB4YksHTsfUXweno5IntnQUaURz8+tLw4THJqa9nuu4lQgUQKKI/C9bV01+3lBTxXe3r7VsUXy8L8TDz71PbXUV/3VrB20uJwJoczlzHreaHVJM0tJYC4AQsPnmFssK4HDvII/+469orHXw0u/exfLmev5gw415v18hgbzPJ8N2/2SGdrcQ4lUhxDtCiB/kPUKFaXh9QaorK1jnceEfDM/9BUXg4EcDPPL8L1lQX82uL69nefNscbjZkW8g73agMhm2u0QIcVOGyx4BXpRS3g00CCEynhyosA5ef5DVixtYubCevkHrzQodOHWJR1/4AI/bya7fX0+bTkV7vneCDVyPUjnA9aS5VAJAhxDCBbQD5/PsS2ECsbjkmD/EunYXHpeT/qFRorF4sYc1yb8cvchj/3yYjpYGfvzYehYlH930IF8RzBm2C7wL3AR8DTgFDKZfoFKprcPHV64xMh5jnceFx11HXEJ/aLTYwwLg/xz28/iP/pXOdhcv/d5duOurdX3/fEWQTdjud4AvSyn/jIQIfif9ApVKbR26fUEA1rW7aHMnfrQ+C9QF/+u9Hv7oJ14+dUMz//ylO2msdejeR74iyCZstw5YK4SoBO4CZJ59KUzgqD9IQ00VK5vr8SRF4C9yXfCDtz7mmz89webVi/jhf+iirtqYycx8RbAHeEQI8QywAzghhPh22jXfBZ4DQsB84Ef5DlJhPF5fiLWeJioqBIubnAhB0YpjKSXP/Pw0f/GzUzxw62K+/9u3U+uoNKy/vKQlpRwSQmwgEbj7lJSyH/CmXfMr4JZCB6gwntFojJMXh/i9e1YCUF1VQUtDral3gtTl0HU1lYyMxdjR5eG722+lskIY2nfe9xcp5SAqbLcs+PDiEBNxyTqPa/I1j9tJX9CcmiB9OfTIWIzKCsH6FQsMFwCoZRMK4OhkUdw0+ZrH7TTtTpBpOXQsLvne62dM6V+JQIHXH2JRQw2tKXPvbW4nF0OjTJjgFRR7GbcSgQKvL8i6dhdCXH/08LjriMUll4bHDO+/2Mu4lQhsTigS5ezACJ3trimva0sS/FeNrwue2NJBbRGXcSsR2Jxj/sRJvLd6mqa8bqZX8OBtbTy+8frys3yWQxeCWkptc7z+IAC3trmmvK49ivSZ9Fx+w8LEatBXHv8Ma9qa5rhaX9SdwOZ0+4KsbK6nqW7qcoRaRyULG2pMW1LdE0j0s3RBnSn9paJEYHOO+hNFcSYSXoE5d4LeQJj59dWGrA2aCyUCG9MfGuXS0Ni0ekDD464zzSvoDYywrAh3AVAisDWpK0cz0eZyciEYIR43fu1jbyDM8gWF7xLLByUCG+P1B6mqENy8uDFju8ftJBqTXDbYKxiNxrgQiqg7gcJ8jvqDrF7cOOMKzbbJaVJji2P/YBgpUXcChbnE45KjvtCM9QBAu9ucadKegYTI1J1AYSpnB0YYHpuYsR4AaHMlfimNLo57AiMALFN3AoWZeJNFcfpyiVSc1ZUsqK82/HHo/NUwDbVVuOvMnx4FJQLbctQfpL66khsWzpv1OjOWVPckZ4ZSF/CZiRKBTen2h1jT1jTnppU2t9PwbZbF9AhAicCWjE3EOHlhaNZHIQ2Puw6/gV5BNBbHPxgp2swQKBHYklMXhxmPxWctijU8bifjE3EGRozxCvoGI8TiUt0JFOZyNLlyNBsRTO4rMOiRqNgzQ6BEYEu6fSGa51WzpGnuKEOPO/EX2qi64Hxy087yUrwTzJVKnXLds0KIbfn2o9Afrz/IOo8rq9mYNoM31/QMhHEml20XCyNTqRFC3A20Sin3FjBGhY4Mj0b5+Mq1rB6FAObVVOGqcxjmFWgzQ8WaHgUDU6mFEA7g74EeIcTnM72JCuQ1n2N9IaTMrh7QMHJfQU9gpKgzQ2BsKvUXgA+Bp4A7hRCPp1+gAnnNx+tL7inOYQtjm8sYwywWl/iuRljWXLx6AIxNpb4NeC4Z0fgicF+efSl0xOsLsmxBXU7x5h53HX2DEaTU1yu4GIowHouzbH5p3gmySaX+CFiZ/LgL6M2zL4WOaEVxLnjcTiLRGFdHxnUdy/lA8WeGwNhU6ueB+4QQbwNfAb6X9ygVunB5aJSLodGc6gEwzivQNtcv0+HcsUIwMpV6GPjNQgeo0A9vMmNo3Sx7CDIx6RUEIzkLaDZ6AyNUV1WwWMejl/JBpVLbCK8vSGWF4JYluYnAqB1mPYERls6vo8KE5OnZUI6xjfD6g3S0NOCszu3Aiyang4baKt1d48Tm+uLWA6BEYBuklJPBu/mgd/yKlJLeQJilRZ4ZAiUC29ATCDM0OpFzPaCht1dwZXiMSDTG8iJ7BKBEYBu8c2QMzYXmGuvlFUzODBXZLYYyDuRNPQNricvJE1s6TEs5tiLdviBORyU3LZp9O+VMeNxOro1NEIpEcdUVfo6wtoTaCjVBWYog/QysvmCEJ3cfA7CtEI76g6xta6KqMr+bf2pUux4i6A2MUFUhJj2IYlKWj0OZzsCKRGM8/drpIo2ouERjcY5fGJo1Y2guNK9Ar7qgJxDG43bmLUo9Kf4IDKDYZ2BZjdP9w4xPZLedciauu8b6eAW9gRGWWqAegDIVQbHPwLIa3VlkDM2Fq85BfXWlLkuqtelRK9QDUKYieGJLB860fE0B/KfNGff+lD1H/UHm11dPPtfngxBCN69gMBxleHTCEjNDUKYiePC2Nr7z0Bq0zUoL6quRJB4L7Ig3mTla6O6tNp2CuKw0MwRlKgKA25fNR0r484fWcPib9/Pbn1zKD989x8GPBoo9NFO5NjbBmcvDOS+fzoTH7aRPh5qg1wIJE6mUrQi6tViR5A//G5+7mZXN9fzhT7yEwtHiDcxkjie3UxZSD2h43E6GRicYGi3s/1/PQBghoH2+NWq0shWB1xekpqqCjtYGIBEu+1cPd3JleIxv/vR4kUdnHlrGUCHToxpaSnWhC+l6AyMsaXJSU5XbQj6jKGsRrGlrwpEyD72u3cXXNt3E//Ve4KfdfbN8dfng9YXwuJ0smFd4pIleZxv3BMJFTZxLpyxFMBGLc/xC5gMovrLhBm5b6uJP9hy3hW/QXcDK0XT02ldw/mrYMvUAlKkIzly6xmg0nvE5uKqygv/xcCexuOQPd3lNOZSuWAxcG6MvGKFTh6IYErNstY6Kgh6HQpEoV0fGLTMzBGUqAm9aUZzOsgX1/LdtN/Pe2QDPv3vOvIGZjJ71AOjjFZy30OpRjbIUwVF/kCanY9bnzh1d7fybm1t4+rXTnLw4ZOLozKPbF6JCwJocMobmos1VWBDXpEdggX0EGmUpgu4szCEhBN/dvpZGp4P//ONuRtMW3JUDXl+QT7Q0UF+j32LhxMk1+dcEmkewdH4ZiCCHQN4WIcSRfPvJlfD4BGcuDWc1L75gXg1P/cZaTvUP899/Xl4rTKWUHM0jY2gu2txOBsNRRsYm8vr6nkCYRQ011FVbZxW/oYG8Sb7H9bQ6wzlxYYhYXGb9w9+4qoWddyXd5I/Lx032XY0wGI5ya7t+j0IwNX4lH84X8eT6mTAskBdACLERGAH6Z2jXPZBX20aYyw//G/92NcsX1PNHu7yEIuXhJqc75nrhKXCatKfI55NlwrBAXiFENfAt4OszvYkRgbxef4glTbUsasg+0Kmuuoq/eriTS8NjfKtM3OR0x1wvPAWk0YXHJ7g8PMbyIifOpWNkIO/Xgb+VUgbz7CMv8o0V6Wx38bWNN/HT7vJwk4/6pzvmetA8r4bqqvy8gt5AcU+unwkjA3k3A18VQrwJdAohfphnX1lzdWSc81fDeTukX72vPNzkiVicY32ZHfNCqagQePKMX5lcPWqBrKFU8i3R9wDvCCGWAJ8FfksI8W0p5eRMkZTyHu1jIcSbUsrfLWikWXC0wOdgzU3+7F+/wxee/yXhaIyLwVHT0yoKScrYc6SP77x6ktFonD1H+ljncek+7ja3E38efyS0mJWl5XAnkFIOkSiO3wfuk1J6UwWQ4foNeY0uR7y+EELA2gL+Ai5bUM+2Wxfz0ZURLgRHkVxPq9hzxPjHJC0poy8Yyblv7WsvDyeOWx0MRw0Zd777CnoDYebXV9PkdOg6nkIpq0Berz/IjQvnMa9Ac+jdDBtvtLQKo+8GMyVl/OneE8y1MexP956YMWVDz3G3uZwMXBsnMh7LKde02CfXz4R1HIsC0bI271u1qOD3uhAcneF14+uEmfoYDEf5j/+7W9f3zJdUr+DGHMK8egNh7lwxX9ex6EHZiKAvGCEwMq7LsuElM6yPMSOtYqa+FzXU8KPHPjnr1/77596ffBRKf089SfUKshXBaDTGhVDEkneCslk7pB1Il2/gbCqZ0iqcjkqe2NJR8Htn03f6U4/TUckff241NyycN+u/P/7calPGre0ryMU19g+GkdJ606NQRncCrz9IdWUFq1obC34v7fn5qX2nuBAapb6mkj9/cK0ps0N3rpiPBBprqxgenchpdki7xugM1kUNtTgqRU7TpD0D1ltCrVE2Iuj2Bbl5SSPVVfrc3B68rY0Hb2vjSy98wOlLw3y+c4ku7zsX+09dBmD3Vz7FjYtyd3u1cRtJZYVgcVNuXkHvVe2QPuuJoCweh2JxyfG+kC6JCulsWt2CfzDCmUvXdH/vTOw/eYllC+q4YWF+6dFmkes0aW9ghIbaKtx11poehTIRwUeXrxEejxnikG5anZhteuPkJd3fO53w+AQHPw6waVVLwUFZRuPJMYirJ7l61IrfV1mIoNADKGajpbGWtW1NHEg+phjJu78eYHwiPik8K9PmquPy8BhjE9ltRrKqRwDlIgJ/kIbaKlYY9Ly5cdUi/vX8IIFr06cf9WT/ycs01FRxx3LrzaWno02TzuSppBKNxfEPWnN6FMpIBOs8LsOOAt28ugUp4f+d1mfPQybiccn+U5e5p2OhbsW9keSyr6BvMEIsLi05MwRlIILRaIxTF4cNqQc01rQ10tJYw34D64KjfSEGro2xuQQehSDFK8iiLrgewKtEYAgnLgwxEZeG1AMaQgg2rmrh7TNXsn4GzpUDJy9RIWDDJ0pDBK2NtVRWZOcVnJ+cHlWPQ4agLZ82Yno0lc2rFzEyHuNX564a8v5vnLzM7cvcuOsLPw/MDKoqK2htrM3qcahnIIzTUcnChsKjII2g5EXg9QVpbaylpTH77ZT58KkbmqmpqmD/Sf1niS4EI3x4cYhNq6ftUrU02rGuc6HNDFlxehTKQQR+Y3ZQpeOsruQzNzbzxslLup3lq6G5xKVSD2hkm0Znxc31qZS0CELhKOcGRgytB1Ixyj0uFZc4nTa3k0tDo4xPxGe8JhaX+K5GLFsUQ4mL4GhfEDC+HtAwwj3WXOKNqxZZ9nFhJjxuJ3EJ/aGZvYKLoQjjsbhlp0ehxEWgOcWFbKfMBSPcY80l3lxi9QBk5xVoAbxWnRmCEhdBty/EyoX1NNaatyhLb/e4lFzidDzJk2tm23Svba5fZrGsoVRKVgRSSrz+oG7Z+9mip3tcai5xOq1NtVSI2YO4egMjVFdVsNjg2btCMCyQVwjRJIT4mRDidSHEy8lEOt3oHxrlyvCYaUWxhp7u8bESc4nTqa6qoKWxdlbXuCcwQrvbadiSFj0wMpB3J/CMlPJ+ElmkW/Mf5nSMXDk6G3q6x/tLzCXOxFxR7b0WDOBNx7BAXinls1LK15OfLgSmVZOFBPJ2+0I4KgWrF+ubtZkNernHpeYSZ2I2r0BKSW/AWueTZcKwQF4NIcR6wC2lfD+9rZBAXq8vyOrFjUU5BlQP97hUXeJ02lxO+odGmYhN9wquDI8RicYsdSpNJowM5EUIMR/4G+CLefaTkXhccqwvpHvseLbo4R6XqkucjsftJBaX9A9N9wp6LHg+WSYMC+RNFsK7gCellL159pORswPXuDY2YXo9kEqh7vGBk5dYOr/0XOJ0ZltSfX0JdXneCfYAjwghngF2ACeEEN9Ou+ZLwO3AN4QQbwohHs5/mFPpTmYMdep8CksuFOIeh8cn+MXHATatLj2XOB0tjS5TXdAbGKGyQpgSWlYIhgXySim/L6V0Syk3JP/9uPDhJjjqDzKvpoqVzcX7K1qIe1zKLnE6S1yJ+f9MIugJhPG4nbqfkaA3eY9OSjkopdwlpcx4FJOReH1B1rY1FX3uOV/3uJRd4nRqqipZ1FBDX3D6NGliCbW16wEoQcd4bCLGhxeHiloPaOTjHsfjkgOnS9clzkSm+BVtetTq9QCUoAhOXhwmGpNFrQc08nGPj/WFuDI8xiYd0rOtQiavYDAcZXh0Qt0JjEDbTnlrkaZHU8nHPdZc4vs6ykcEbW4nF0OJRAmNUpkZghIUQbcvyMKGGhY3WWNBVq7ucTm4xOl43E6iMcnl4eteweT5ZEoE+uP1JTKGrDK1mIt7XC4ucTptruleQc9AGCGuT6FamZISwdBolI+vjFiiHtDIxT3WplPLqR6AzF5Bb2CEJU1Oah3mL2vJlZISwXF/wiSzQj2QSrbu8f6kS5zLEUelQKYdZr1XwyXxKAQlJoLuyaLYOncCSPgFMLt7XE4ucTq1jkqa51VPiV8phdWjGiUlAq8vyIrmelx11ioqW5tqWdPWOKt7XE4ucSbaUqZJQ5EoV0fGS2JmCEpOBCFdziQzgk2rWmZ1jw+cKh+XOBMel3OyMD4/uXpUiUBXLg2N0j80arl6QGM293hyL/EnysclTseTPOU+HpeTHoF6HNKZYm2nzJbZ3ONJl7jE9w7MhsftZHwizsC1sZLyCKCUROAPUlUhuGVJ4adTGsFs7nE5usTpaPsK/MEIPYEwixpqqKsujXMhS0cEvhCrFjdYet5506rM7nE5usTppHoF50tgc30qJSGCeFxy1B+0bD2g8ekbp7vHF0MJl3jjqvKcFdLQXGP/YNjyAbzplIQIegIjDI1OmB60lSuZ3GNNEKW+l3gu6msSx7N+dOkal4fHlAj0xps0yaxaFKeS7h6Xq0ucCY+7jl98PACUzswQlIoIfCHqqitL4hcp1T0uZ5c4E20uJ5eGEj5JKdUEJVG+e/1B1rQ1UWnhKD+NVPf4Ey0NZe0Sp6OtIQJYqh6H9GN8Is6JC0OmnUGgB5p7/JNDvrJ2idPRRDC/vpomp3lJ4YViaRHsOdLHp//yAOMTcX5yyMeeI31zf5EFqKoQSAk///ASE/E4rx67WOwhmYIvuWzi6sg4n/6LAyXz8zIslTrba2Ziz5E+ntx9jCvDiWfMwXCUJ3cfs/z/2D1H+nj2zY8mP49E4yUx7kLZc6SPF9+/nrHWF4yUzPdtWCp1lsnVM/L0a6eJRKc6r5FojKdfO53PkE0jMe6puZylMO5Cefq104xNlOb3bVgqdTbXzJZKfWGG009met0qlOq4C6WUv28jU6nnvGa2VOqZovusHulXquMulFL+vo1Mpc4quXomntjSgTNtnZDTUckTWzpyG6nJlOq4C6WUv+98fQItlfp9EqnUmR78srlmRh68rQ1IPGteCEZY4nLyxJaOydetSqmOu1BK+fsW+eTrCyEagXeA/cBngd8CfjM1lDfDNZ+UUoZmes+uri556NChnMeiUGSDEOKwlLIrU5uRqdTp18woAIWimOS9bEJKOcj12Z+8r1Eoio2lHWOFwgyUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2R4lAYXuUCBS2J2cRZBnE2ySE+JkQ4nUhxMtCiPI9sU5R8uQkghxCdncCz0gp7wf6ga2FDVOhMI5cI1c2MD1k99fpF0kpn035dCFwOf0aSATyAo8BLF26NMehKBT6MKsIhBB/B6SGSd4LPJ/8eAi4cY6vXw+4pZTvZ2qXUj4HPAeJBLosx6xQ6MqsIpBS/n7q50KIvybLkF0hxHzgb4B/V+AYFQpDybUw1kJ2IRGy25PpomQhvAt4UkrZm+kahcIq5CqCPcAjQohngB3AvwghbhZCfDvtui8BtwPfEEK8KYR4uPChKhTGkHMqtRDCDdwPvC2l7NdrICqVWmEks6VS5xzIq0J2FeWGcowVtkeJQGF7lAgUtifvQzr0JhAI8MILL0x57ZZbbuGOO+4gGo3y0ksvTfuazs5OOjs7CYfD7No1vUzp6upizZo1hEIhXn755Wnt69evp6Ojg4GBAV555ZVp7ffccw8rV66kv7+fffv2TWvftGkT7e3t+Hw+9u/fP61969attLa2cvbsWd5+++1p7Q888ADNzc2cPn2a9957b1r7Qw89RFNTE8ePHyfTpMGOHTuoq6uju7ub7u7uae07d+7E4XDwwQcfcOLEiWntjz76KAAHDx7kzJkzU9ocDgc7d+4E4K233uLcuXNT2uvq6tixYwcAb7zxBn6/f0p7Y2Mj27dvB2Dfvn3090+dQ1mwYAHbtm0DYO/evQQCgSntra2tbN2aWG2ze/duhoaGprR7PB42b94MwK5duwiHw1PaV6xYwb333guQ8XcnFXUnUNievA7uMwI1RaowEt0P7lMoygklAoXtUSJQ2B4lAoXtUSJQ2B4lAoXtUSJQ2B4lAoXtsYxZJoS4Asy0C60ZGDBxOHbvuxy/52VSyoWZGiwjgtkQQhyaye1TfZdPv8XqWz0OKWyPEoHC9pSKCJ5Tfdui36L0XRI1gUJhJKVyJ1AoDEOJQGF7LC+CbKLgDeq3qPHyQogWIcQRM/tM6ftZIcQ2k/t0CyFeFUK8I4T4gZl9W1oEOUTBG0Gx4+W/x/XcV9MQQtwNtEop95rc9SPAi1LKu4EGIYRpXoGlRUDmKHhTkFI+K6V8PfnpjPHyRiCE2AiMkBCfaQghHMDfAz1CiM+b2TcQADqEEC6gHThvVsdWF0E90Jf8eAhoMXsAc8XLG9BfNfAt4Otm9JfGF4APgaeAO4UQj5vY97vATcDXgFPAoFkdW10E18gyCt4IUuLlv2hit18H/lZKGTSxT43bgOeSGbMvAveZ2Pd3gC9LKf+MhAh+x6yOrS6CrKLgjaCI8fKbga8KId4EOoUQPzSx74+AlcmPu5h5QaMR1AFrhRCVwF2AaQaWpc0yIUQj8A6wH/gs8EkpZcikvv+AxF8nb/Kl70spf2xG3yljeFNKucHE/hqAfyDx2OkAfkNK2Tf7V+nW953APwLLgPeAh6SU10zp28oiAOOi4BUKDcuLQKEwGqvXBAqF4SgRKGyPEoHC9igRKGyPEoHC9vx/ycgasDpbYMcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ax1 = plt.subplot(1,2,1)\n",
    "df1 = df['郁闭度'].sample(10)\n",
    "x = np.arange(0,10,1)\n",
    "ax1.plot(x,df1,marker='o')#绘制折线图\n",
    "ax1.axhline(y = df1.mean() - 2* df1.std(), linestyle = '--', color = 'gray')\n",
    "ax1.axhline(y = df1.mean() + 2* df1.std(), linestyle = '--', color = 'gray')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>坡位</th>\n",
       "      <th>...</th>\n",
       "      <th>林带长度</th>\n",
       "      <th>林带宽度</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>336.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>338.0</td>\n",
       "      <td>338.0</td>\n",
       "      <td>321.000000</td>\n",
       "      <td>326.000000</td>\n",
       "      <td>132.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>338.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>338.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.301775</td>\n",
       "      <td>5.565089</td>\n",
       "      <td>5.964497</td>\n",
       "      <td>39.863905</td>\n",
       "      <td>2.952663</td>\n",
       "      <td>1.0</td>\n",
       "      <td>49.535503</td>\n",
       "      <td>416.284024</td>\n",
       "      <td>5.080357</td>\n",
       "      <td>6.657738</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.950156</td>\n",
       "      <td>261.306748</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>23.976331</td>\n",
       "      <td>1.089941</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.032544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.903700</td>\n",
       "      <td>0.496480</td>\n",
       "      <td>2.482923</td>\n",
       "      <td>41.217442</td>\n",
       "      <td>2.438102</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.742480</td>\n",
       "      <td>110.947748</td>\n",
       "      <td>2.415640</td>\n",
       "      <td>1.148185</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.322115</td>\n",
       "      <td>64.955685</td>\n",
       "      <td>0.515041</td>\n",
       "      <td>15.080379</td>\n",
       "      <td>0.587571</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.177704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>160.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>9.250000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>350.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>201.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>397.500000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>1.300000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>56.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>463.750000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>301.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>35.000000</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>146.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>990.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1005.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>2.700000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 82 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             调查方式         乡镇场         村分场         小班号         细班号   流域名称  \\\n",
       "count  338.000000  338.000000  338.000000  338.000000  338.000000  338.0   \n",
       "mean     1.301775    5.565089    5.964497   39.863905    2.952663    1.0   \n",
       "std      0.903700    0.496480    2.482923   41.217442    2.438102    0.0   \n",
       "min      1.000000    5.000000    2.000000    1.000000    1.000000    1.0   \n",
       "25%      1.000000    5.000000    4.000000    9.250000    1.000000    1.0   \n",
       "50%      1.000000    6.000000    6.000000   22.000000    2.000000    1.0   \n",
       "75%      1.000000    6.000000    7.000000   56.000000    4.000000    1.0   \n",
       "max      4.000000    6.000000   11.000000  146.000000   14.000000    1.0   \n",
       "\n",
       "             地貌类型       平均海拔高          坡向          坡位  ...   林带长度   林带宽度  \\\n",
       "count  338.000000  338.000000  336.000000  336.000000  ...  338.0  338.0   \n",
       "mean    49.535503  416.284024    5.080357    6.657738  ...    0.0    0.0   \n",
       "std      3.742480  110.947748    2.415640    1.148185  ...    0.0    0.0   \n",
       "min     40.000000  160.000000    1.000000    1.000000  ...    0.0    0.0   \n",
       "25%     51.000000  350.000000    3.000000    7.000000  ...    0.0    0.0   \n",
       "50%     51.000000  397.500000    5.000000    7.000000  ...    0.0    0.0   \n",
       "75%     51.000000  463.750000    8.000000    7.000000  ...    0.0    0.0   \n",
       "max     51.000000  990.000000    9.000000    7.000000  ...    0.0    0.0   \n",
       "\n",
       "            主体功能区     土地利用现状地类       林木生长势       灌木覆盖度       灌木平均高     其它  \\\n",
       "count  321.000000   326.000000  132.000000  338.000000  338.000000  338.0   \n",
       "mean     2.950156   261.306748    1.250000   23.976331    1.089941    0.0   \n",
       "std      0.322115    64.955685    0.515041   15.080379    0.587571    0.0   \n",
       "min      1.000000   103.000000    1.000000    0.000000    0.000000    0.0   \n",
       "25%      3.000000   201.000000    1.000000   15.000000    1.000000    0.0   \n",
       "50%      3.000000   301.000000    1.000000   25.000000    1.300000    0.0   \n",
       "75%      3.000000   301.000000    1.000000   35.000000    1.500000    0.0   \n",
       "max      4.000000  1005.000000    4.000000   65.000000    2.700000    0.0   \n",
       "\n",
       "       重要生态功能区       土地使用权  \n",
       "count      0.0  338.000000  \n",
       "mean       NaN    1.032544  \n",
       "std        NaN    0.177704  \n",
       "min        NaN    1.000000  \n",
       "25%        NaN    1.000000  \n",
       "50%        NaN    1.000000  \n",
       "75%        NaN    1.000000  \n",
       "max        NaN    2.000000  \n",
       "\n",
       "[8 rows x 82 columns]"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()#数据描述"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 异常值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算下四分位数和上四分位\n",
    "Q1 = df['郁闭度'].quantile(q = 0.25)\n",
    "Q3 = df['郁闭度'].quantile(q = 0.75)\n",
    "\n",
    "# 基于1.5倍的四分位差计算上下须对应的值\n",
    "low_whisker = Q1 - 1.5*(Q3 - Q1)\n",
    "up_whisker = Q3 + 1.5*(Q3 - Q1)\n",
    "\n",
    "countdata = df['郁闭度'].between(left=low_whisker,right=up_whisker)\n",
    "#取出异常值索引\n",
    "index_list = df[countdata==False].index.tolist()\n",
    "df['郁闭度'].drop(labels=index_list,inplace=True)#删除异常值\n",
    "df['郁闭度'].fillna(value=np.mean(df['郁闭度']),inplace=True)#空值填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>G360726050609901</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>484</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>9484092A-A0D4-4F8A-B6E4-4EF65F536117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>G360726050612301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>123</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>613</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>DCAED839-5ED1-450D-BDD7-79CE74AB37AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>G360726050602802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>420</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-22</td>\n",
       "      <td>acc8ae69-6c3a-41f3-9f1c-25db74ec0488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>G360726050609801</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>98</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>441</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>48B0AABF-2026-4E2C-A686-BBDA04204B45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>G360726050609701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>97</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>432</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>4F321F37-E249-49D5-A3BA-AB47E7CFC082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td>G360726060301204</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>270</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>0C495930-8DE1-4C80-AD55-8B254FD03B05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>G360726060301301</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>350</td>\n",
       "      <td>8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>35</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>335</th>\n",
       "      <td>G360726060300403</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>310</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>946F45ED-C89A-42E4-9F67-A48C2D2E5583</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>G360726060300902</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>300</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.2</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-12</td>\n",
       "      <td>8C313EF1-7D64-4892-B33E-F8459847F299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>337</th>\n",
       "      <td>G360726060300803</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>290</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-16</td>\n",
       "      <td>2392C5AE-51D7-4676-98E8-947DF46D7D09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>338 rows × 86 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "0    G360726050609901   1.0    5    6   99    1     1    51    484  1.0  ...   \n",
       "1    G360726050612301   1.0    5    6  123    1     1    40    613  1.0  ...   \n",
       "2    G360726050602802   1.0    5    6   28    2     1    51    420  3.0  ...   \n",
       "3    G360726050609801   1.0    5    6   98    1     1    51    441  3.0  ...   \n",
       "4    G360726050609701   1.0    5    6   97    1     1    51    432  3.0  ...   \n",
       "..                ...   ...  ...  ...  ...  ...   ...   ...    ...  ...  ...   \n",
       "333  G360726060301204   4.0    6    3   12    4     1    51    270  8.0  ...   \n",
       "334  G360726060301301   4.0    6    3   13    1     1    51    350  8.0  ...   \n",
       "335  G360726060300403   4.0    6    3    4    3     1    51    310  4.0  ...   \n",
       "336  G360726060300902   4.0    6    3    9    2     1    51    300  6.0  ...   \n",
       "337  G360726060300803   4.0    6    3    8    3     1    51    290  4.0  ...   \n",
       "\n",
       "     主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "0      3.0     301.0    1.0     15    1.1   0      NaN      1  2019-09-27   \n",
       "1      3.0     301.0    1.0     16    1.5   0      NaN      1  2019-09-27   \n",
       "2      3.0     301.0    1.0     45    1.6   0      NaN      1  2019-09-22   \n",
       "3      3.0     301.0    1.0     21    1.5   0      NaN      1  2019-09-27   \n",
       "4      3.0     301.0    1.0     25    1.5   0      NaN      1  2019-09-18   \n",
       "..     ...       ...    ...    ...    ...  ..      ...    ...         ...   \n",
       "333    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "334    3.0     201.0    2.0     35    1.8   0      NaN      1  2019-09-16   \n",
       "335    3.0     201.0    NaN     35    1.0   0      NaN      1  2019-09-18   \n",
       "336    3.0     301.0    2.0     25    1.2   0      NaN      1  2019-09-12   \n",
       "337    3.0     201.0    NaN     35    1.5   0      NaN      1  2019-09-16   \n",
       "\n",
       "                                     连接字段  \n",
       "0    9484092A-A0D4-4F8A-B6E4-4EF65F536117  \n",
       "1    DCAED839-5ED1-450D-BDD7-79CE74AB37AB  \n",
       "2    acc8ae69-6c3a-41f3-9f1c-25db74ec0488  \n",
       "3    48B0AABF-2026-4E2C-A686-BBDA04204B45  \n",
       "4    4F321F37-E249-49D5-A3BA-AB47E7CFC082  \n",
       "..                                    ...  \n",
       "333  0C495930-8DE1-4C80-AD55-8B254FD03B05  \n",
       "334  E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC  \n",
       "335  946F45ED-C89A-42E4-9F67-A48C2D2E5583  \n",
       "336  8C313EF1-7D64-4892-B33E-F8459847F299  \n",
       "337  2392C5AE-51D7-4676-98E8-947DF46D7D09  \n",
       "\n",
       "[338 rows x 86 columns]"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop(columns=['郁闭度'],axis=1)#删除某列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('float64')"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['郁闭度'].dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算平均数和标准差\n",
    "Q1 = np.mean(df['郁闭度'])\n",
    "Q3 = np.std(df['郁闭度'])\n",
    "\n",
    "# 计算上区间和下区间\n",
    "low_whisker = Q1 - 3 * Q3 \n",
    "up_whisker = Q1 + 3 * Q3\n",
    "\n",
    "countdata = df['郁闭度'].between(left=low_whisker,right=up_whisker)\n",
    "#取出异常值索引\n",
    "index_list = df[countdata==False].index.tolist()\n",
    "df['郁闭度'].drop(labels=index_list,inplace=True)#删除异常值\n",
    "df['郁闭度'].fillna(value=np.mean(df['郁闭度']),inplace=True)#空值填充"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据规整化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>调查方式</th>\n",
       "      <th>乡镇场</th>\n",
       "      <th>村分场</th>\n",
       "      <th>小班号</th>\n",
       "      <th>细班号</th>\n",
       "      <th>流域名称</th>\n",
       "      <th>地貌类型</th>\n",
       "      <th>平均海拔高</th>\n",
       "      <th>坡向</th>\n",
       "      <th>...</th>\n",
       "      <th>主体功能区</th>\n",
       "      <th>土地利用现状地类</th>\n",
       "      <th>林木生长势</th>\n",
       "      <th>灌木覆盖度</th>\n",
       "      <th>灌木平均高</th>\n",
       "      <th>其它</th>\n",
       "      <th>重要生态功能区</th>\n",
       "      <th>土地使用权</th>\n",
       "      <th>调查时间</th>\n",
       "      <th>连接字段</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>G360726050609901</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>484</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>9484092A-A0D4-4F8A-B6E4-4EF65F536117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>G360726050612301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>123</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>613</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>16</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>DCAED839-5ED1-450D-BDD7-79CE74AB37AB</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>G360726050602802</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>28</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>420</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>45</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-22</td>\n",
       "      <td>acc8ae69-6c3a-41f3-9f1c-25db74ec0488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>G360726050609801</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>98</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>441</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-27</td>\n",
       "      <td>48B0AABF-2026-4E2C-A686-BBDA04204B45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>G360726050609701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>97</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>432</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>4F321F37-E249-49D5-A3BA-AB47E7CFC082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>G360726050612701</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>127</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>500</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>21</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-30</td>\n",
       "      <td>503F1608-BE75-4A34-809D-CBE4C5E1620C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>G360726050602301</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>23</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>400</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-18</td>\n",
       "      <td>564D93AA-0E59-4189-BDD6-E672493509AE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>G360726050614201</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>142</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>333</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>55</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-19</td>\n",
       "      <td>2AE4BB51-D868-4B7D-90C9-91AC9FEB2AE4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>G360726050601302</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>13</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>371</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>301.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>30</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-20</td>\n",
       "      <td>C4DE3B4B-D307-4B8E-B649-C3EC5632C6B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>G360726050605403</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>54</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>395</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>21</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2019-09-30</td>\n",
       "      <td>519BB458-284B-44F0-BD6F-503B107367AE</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id  调查方式  乡镇场  村分场  小班号  细班号  流域名称  地貌类型  平均海拔高   坡向  ...  \\\n",
       "0  G360726050609901   1.0    5    6   99    1     1    51    484  1.0  ...   \n",
       "1  G360726050612301   1.0    5    6  123    1     1    40    613  1.0  ...   \n",
       "2  G360726050602802   1.0    5    6   28    2     1    51    420  3.0  ...   \n",
       "3  G360726050609801   1.0    5    6   98    1     1    51    441  3.0  ...   \n",
       "4  G360726050609701   1.0    5    6   97    1     1    51    432  3.0  ...   \n",
       "5  G360726050612701   1.0    5    6  127    1     1    40    500  1.0  ...   \n",
       "6  G360726050602301   1.0    5    6   23    1     1    51    400  3.0  ...   \n",
       "7  G360726050614201   1.0    5    6  142    1     1    51    333  3.0  ...   \n",
       "8  G360726050601302   1.0    5    6   13    2     1    51    371  6.0  ...   \n",
       "9  G360726050605403   1.0    5    6   54    3     1    51    395  5.0  ...   \n",
       "\n",
       "   主体功能区  土地利用现状地类  林木生长势  灌木覆盖度  灌木平均高  其它  重要生态功能区  土地使用权        调查时间  \\\n",
       "0    3.0     301.0    1.0     15    1.1   0      NaN      1  2019-09-27   \n",
       "1    3.0     301.0    1.0     16    1.5   0      NaN      1  2019-09-27   \n",
       "2    3.0     301.0    1.0     45    1.6   0      NaN      1  2019-09-22   \n",
       "3    3.0     301.0    1.0     21    1.5   0      NaN      1  2019-09-27   \n",
       "4    3.0     301.0    1.0     25    1.5   0      NaN      1  2019-09-18   \n",
       "5    3.0     301.0    1.0     21    1.5   0      NaN      1  2019-09-30   \n",
       "6    3.0     301.0    1.0     25    1.5   0      NaN      1  2019-09-18   \n",
       "7    3.0     201.0    NaN     55    1.5   0      NaN      1  2019-09-19   \n",
       "8    3.0     301.0    1.0     30    1.5   0      NaN      1  2019-09-20   \n",
       "9    3.0     201.0    NaN     21    1.4   0      NaN      1  2019-09-30   \n",
       "\n",
       "                                   连接字段  \n",
       "0  9484092A-A0D4-4F8A-B6E4-4EF65F536117  \n",
       "1  DCAED839-5ED1-450D-BDD7-79CE74AB37AB  \n",
       "2  acc8ae69-6c3a-41f3-9f1c-25db74ec0488  \n",
       "3  48B0AABF-2026-4E2C-A686-BBDA04204B45  \n",
       "4  4F321F37-E249-49D5-A3BA-AB47E7CFC082  \n",
       "5  503F1608-BE75-4A34-809D-CBE4C5E1620C  \n",
       "6  564D93AA-0E59-4189-BDD6-E672493509AE  \n",
       "7  2AE4BB51-D868-4B7D-90C9-91AC9FEB2AE4  \n",
       "8  C4DE3B4B-D307-4B8E-B649-C3EC5632C6B1  \n",
       "9  519BB458-284B-44F0-BD6F-503B107367AE  \n",
       "\n",
       "[10 rows x 87 columns]"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)#查看数据集"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 字符串一致化处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      9484092a-a0d4-4f8a-b6e4-4ef65f536117\n",
       "1      dcaed839-5ed1-450d-bdd7-79ce74ab37ab\n",
       "2      acc8ae69-6c3a-41f3-9f1c-25db74ec0488\n",
       "3      48b0aabf-2026-4e2c-a686-bbda04204b45\n",
       "4      4f321f37-e249-49d5-a3ba-ab47e7cfc082\n",
       "                       ...                 \n",
       "333    0c495930-8de1-4c80-ad55-8b254fd03b05\n",
       "334    e3e6f4b3-0bf9-46ca-9191-2caeeffe20ec\n",
       "335    946f45ed-c89a-42e4-9f67-a48c2d2e5583\n",
       "336    8c313ef1-7d64-4892-b33e-f8459847f299\n",
       "337    2392c5ae-51d7-4676-98e8-947df46d7d09\n",
       "Name: 连接字段, Length: 338, dtype: object"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['连接字段'].map(str.lower)#转换为小写"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      9484092A-A0D4-4F8A-B6E4-4EF65F536117\n",
       "1      DCAED839-5ED1-450D-BDD7-79CE74AB37AB\n",
       "2      ACC8AE69-6C3A-41F3-9F1C-25DB74EC0488\n",
       "3      48B0AABF-2026-4E2C-A686-BBDA04204B45\n",
       "4      4F321F37-E249-49D5-A3BA-AB47E7CFC082\n",
       "                       ...                 \n",
       "333    0C495930-8DE1-4C80-AD55-8B254FD03B05\n",
       "334    E3E6F4B3-0BF9-46CA-9191-2CAEEFFE20EC\n",
       "335    946F45ED-C89A-42E4-9F67-A48C2D2E5583\n",
       "336    8C313EF1-7D64-4892-B33E-F8459847F299\n",
       "337    2392C5AE-51D7-4676-98E8-947DF46D7D09\n",
       "Name: 连接字段, Length: 338, dtype: object"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['连接字段'].map(str.upper)#转换为大写"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      9484092a-a0d4-4f8a-b6e4-4ef65f536117\n",
       "1      Dcaed839-5ed1-450d-bdd7-79ce74ab37ab\n",
       "2      Acc8ae69-6c3a-41f3-9f1c-25db74ec0488\n",
       "3      48b0aabf-2026-4e2c-a686-bbda04204b45\n",
       "4      4f321f37-e249-49d5-a3ba-ab47e7cfc082\n",
       "                       ...                 \n",
       "333    0c495930-8de1-4c80-ad55-8b254fd03b05\n",
       "334    E3e6f4b3-0bf9-46ca-9191-2caeeffe20ec\n",
       "335    946f45ed-c89a-42e4-9f67-a48c2d2e5583\n",
       "336    8c313ef1-7d64-4892-b33e-f8459847f299\n",
       "337    2392c5ae-51d7-4676-98e8-947df46d7d09\n",
       "Name: 连接字段, Length: 338, dtype: object"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['连接字段'] = df['连接字段'].map(str.capitalize)#首字母大写\n",
    "df['连接字段']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0      9484092a-a0d4-4f8a-b6e4-4ef65f536117\n",
      "1      Dcaed839-5ed1-450d-bdd7-79ce74ab37ab\n",
      "2      Acc8ae69-6c3a-41f3-9f1c-25db74ec0488\n",
      "3      48b0aabf-2026-4e2c-a686-bbda04204b45\n",
      "4      4f321f37-e249-49d5-a3ba-ab47e7cfc082\n",
      "                       ...                 \n",
      "333    0c495930-8de1-4c80-ad55-8b254fd03b05\n",
      "334    E3e6f4b3-0bf9-46ca-9191-2caeeffe20ec\n",
      "335    946f45ed-c89a-42e4-9f67-a48c2d2e5583\n",
      "336    8c313ef1-7d64-4892-b33e-f8459847f299\n",
      "337    2392c5ae-51d7-4676-98e8-947df46d7d09\n",
      "Name: 连接字段, Length: 338, dtype: object\n"
     ]
    }
   ],
   "source": [
    "df['连接字段'] = df['连接字段'].replace(' ','')#去除空格\n",
    "print(df['连接字段'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      9484092a-a0d4-4f8a-b6e4-4ef65f536117\n",
       "1      Dcaed839-5ed1-450d-bdd7-79ce74ab37ab\n",
       "2      Acc8ae69-6c3a-41f3-9f1c-25db74ec0488\n",
       "3      48b0aabf-2026-4e2c-a686-bbda04204b45\n",
       "4      4f321f37-e249-49d5-a3ba-ab47e7cfc082\n",
       "                       ...                 \n",
       "333    0c495930-8de1-4c80-ad55-8b254fd03b05\n",
       "334    E3e6f4b3-0bf9-46ca-9191-2caeeffe20ec\n",
       "335    946f45ed-c89a-42e4-9f67-a48c2d2e5583\n",
       "336    8c313ef1-7d64-4892-b33e-f8459847f299\n",
       "337    2392c5ae-51d7-4676-98e8-947df46d7d09\n",
       "Name: 连接字段, Length: 338, dtype: object"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['连接字段'].str.strip()#去除字符串前后的空格"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据类型转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "id          object\n",
       "调查方式       float64\n",
       "乡镇场          int64\n",
       "村分场          int64\n",
       "小班号          int64\n",
       "            ...   \n",
       "其它           int64\n",
       "重要生态功能区    float64\n",
       "土地使用权        int64\n",
       "调查时间        object\n",
       "连接字段        object\n",
       "Length: 87, dtype: object"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes#查看数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      3610.0\n",
       "1      1694.0\n",
       "2         0.0\n",
       "3      2489.0\n",
       "4      2341.0\n",
       "        ...  \n",
       "333       0.0\n",
       "334      24.0\n",
       "335       9.0\n",
       "336     799.0\n",
       "337       8.0\n",
       "Name: 活立木总蓄积, Length: 338, dtype: float64"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['活立木总蓄积'].astype('float')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('O')"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['调查时间'].dtype#原始数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0      2019-09-27\n",
      "1      2019-09-27\n",
      "2      2019-09-22\n",
      "3      2019-09-27\n",
      "4      2019-09-18\n",
      "          ...    \n",
      "333    2019-09-16\n",
      "334    2019-09-16\n",
      "335    2019-09-18\n",
      "336    2019-09-12\n",
      "337    2019-09-16\n",
      "Name: 调查时间, Length: 338, dtype: object\n"
     ]
    }
   ],
   "source": [
    "df[\"\"] = pd.to_datetime(df[\"调查时间\"], format=\"%Y/%m/%d\")#日期格式转换\n",
    "print(df[\"调查时间\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
